from IPython.display import Image
Image(r"C:\Users\acer\Desktop\data analysis process.png",width=300,height=200)
# for data Analysis import libreary
import pandas as pd
import numpy as np
# for Data visualization import libreary
from matplotlib import pyplot as plt
import plotly.express as px
data=pd.read_csv("districts cases 2_4_2021.csv")
data.head()
| Districts | positive_cases | active_cases | recoverd | deaths | recovery_rate(%) | deaths_rate(%) | |
|---|---|---|---|---|---|---|---|
| 0 | Ahmednagar | 94044 | 9103 | 83728.0 | 1212 | 89.0 | 1.3 |
| 1 | Akola | 28368 | 4050 | 23850.0 | 464 | 84.1 | 1.6 |
| 2 | Amravati | 49079 | 2935 | 45494.0 | 648 | 92.7 | 1.3 |
| 3 | Aurangabad | 83917 | 19466 | 63095.0 | 1342 | 75.2 | 1.6 |
| 4 | Beed | 25985 | 4139 | 21223.0 | 614 | 81.7 | 2.4 |
data.shape
(36, 7)
data.isnull().sum()
Districts 0 positive_cases 0 active_cases 0 recoverd 1 deaths 0 recovery_rate(%) 1 deaths_rate(%) 0 dtype: int64
# find exact location of null value
null=data.columns[data.isnull().any()]
null
print(data[data['recoverd'].isnull()][null])
recoverd recovery_rate(%) 22 NaN NaN
data.iloc[22] # iloc-->access specific row
Districts Other States/Country positive_cases 146 active_cases 48 recoverd NaN deaths 96 recovery_rate(%) NaN deaths_rate(%) 65.8 Name: 22, dtype: object
remove_null=data.replace(np.nan,0).sum()
remove_null.isnull().sum()
0
# Drop Other States/Country row
# Drop using Dataframe index
clean_data=data.drop([22])
clean_data.head(25)
| Districts | positive_cases | active_cases | recoverd | deaths | recovery_rate(%) | deaths_rate(%) | |
|---|---|---|---|---|---|---|---|
| 0 | Ahmednagar | 94044 | 9103 | 83728.0 | 1212 | 89.0 | 1.3 |
| 1 | Akola | 28368 | 4050 | 23850.0 | 464 | 84.1 | 1.6 |
| 2 | Amravati | 49079 | 2935 | 45494.0 | 648 | 92.7 | 1.3 |
| 3 | Aurangabad | 83917 | 19466 | 63095.0 | 1342 | 75.2 | 1.6 |
| 4 | Beed | 25985 | 4139 | 21223.0 | 614 | 81.7 | 2.4 |
| 5 | Bhandara | 17875 | 2477 | 15076.0 | 320 | 84.3 | 1.8 |
| 6 | Buldhana | 27768 | 3951 | 23527.0 | 285 | 84.7 | 1.0 |
| 7 | Chandrapur | 29016 | 2355 | 26215.0 | 444 | 90.3 | 1.5 |
| 8 | Dhule | 26031 | 5100 | 20562.0 | 365 | 79.0 | 1.4 |
| 9 | Gadchiroli | 10062 | 414 | 9530.0 | 110 | 94.7 | 1.1 |
| 10 | Gondia | 16026 | 869 | 14969.0 | 182 | 93.4 | 1.1 |
| 11 | Hingoli | 7007 | 1912 | 4995.0 | 100 | 71.3 | 1.4 |
| 12 | Jalgaon | 85443 | 6464 | 77358.0 | 1596 | 90.5 | 1.9 |
| 13 | Jalna | 23006 | 675 | 21920.0 | 410 | 95.3 | 1.8 |
| 14 | Kolhapur | 51301 | 731 | 48873.0 | 1694 | 95.3 | 3.3 |
| 15 | Latur | 33226 | 5545 | 26929.0 | 748 | 81.0 | 2.3 |
| 16 | Mumbai | 414773 | 49953 | 352173.0 | 11690 | 84.9 | 2.8 |
| 17 | Nagpur | 230187 | 46333 | 179950.0 | 3859 | 78.2 | 1.7 |
| 18 | Nanded | 43245 | 12268 | 30199.0 | 772 | 69.8 | 1.8 |
| 19 | Nandurbar | 18032 | 4525 | 13215.0 | 291 | 73.3 | 1.6 |
| 20 | Nashik | 178997 | 33442 | 143332.0 | 2222 | 80.1 | 1.2 |
| 21 | Osmanabad | 21395 | 2327 | 18464.0 | 587 | 86.3 | 2.7 |
| 23 | Palghar | 54072 | 3377 | 49711.0 | 974 | 91.9 | 1.8 |
| 24 | Parbhani | 14152 | 4560 | 9227.0 | 354 | 65.2 | 2.5 |
| 25 | Pune | 536262 | 64277 | 463611.0 | 8325 | 86.5 | 1.6 |
clean_data.duplicated().sum()
0
positive_total=clean_data.positive_cases.sum()
positive_total
active_total=clean_data.active_cases.sum()
active_total
display(positive_total,active_total)
2812834
356195
print("The number of Positive_cases in Maharashtra is {} And Number of Active_cases is {}".format(positive_total,active_total))
The number of Positive_cases in Maharashtra is 2812834 And Number of Active_cases is 356195
recoverd_cases=clean_data.recoverd.sum()
recovery_rate= recoverd_cases / positive_total * 100
recovery_rate
display(recoverd_cases,recovery_rate)
2400727.0
85.34904654878318
print("The Recovery_Rate in Maharashtra is {:.2f}%".format(recovery_rate))
The Recovery_Rate in Maharashtra is 85.35%
death_cases=clean_data.deaths.sum()
death_cases
death_rate= death_cases / positive_total * 100
death_rate
display(death_cases,death_rate)
54553
1.9394319039090113
print("The Death_Rate in Maharashtra is {:.1f}%".format(death_rate))
The Death_Rate in Maharashtra is 1.9%
Image(r"C:\Users\acer\Desktop\cross check data.png",width=900,height=500)
fig_dims=(10,10)
fig,ax=plt.subplots(figsize=fig_dims)
name=['Districts']
clean_data.plot.bar(ax=ax)
plt.show()
clean_data.columns
Index(['Districts', 'positive_cases', 'active_cases', 'recoverd', 'deaths',
'recovery_rate(%)', 'deaths_rate(%)'],
dtype='object')
group_positive=clean_data.groupby('Districts')['positive_cases'].sum()
group_positive.head()
Districts Ahmednagar 94044 Akola 28368 Amravati 49079 Aurangabad 83917 Beed 25985 Name: positive_cases, dtype: int64
px.bar(data_frame=group_positive,orientation='v',title='positive cases in Maharashtra')
top_deaths=clean_data[clean_data['deaths']>2000]
top_deaths
| Districts | positive_cases | active_cases | recoverd | deaths | recovery_rate(%) | deaths_rate(%) | |
|---|---|---|---|---|---|---|---|
| 16 | Mumbai | 414773 | 49953 | 352173.0 | 11690 | 84.9 | 2.8 |
| 17 | Nagpur | 230187 | 46333 | 179950.0 | 3859 | 78.2 | 1.7 |
| 20 | Nashik | 178997 | 33442 | 143332.0 | 2222 | 80.1 | 1.2 |
| 25 | Pune | 536262 | 64277 | 463611.0 | 8325 | 86.5 | 1.6 |
| 32 | Thane | 339590 | 39692 | 293897.0 | 5970 | 86.5 | 1.8 |
top_5=top_deaths.groupby('Districts')['deaths'].sum()
top_5
Districts Mumbai 11690 Nagpur 3859 Nashik 2222 Pune 8325 Thane 5970 Name: deaths, dtype: int64
top_5.plot.pie(autopct='%0.1f%%',radius=2,subplots=True)
plt.show()
legend=['Mumbai','Nagpur','Nashik','Pune','Thane']
fig=px.pie(top_5,values='deaths',names=legend,title='Deaths cases in Maharasthra')
fig.show()
top_active=pd.pivot_table(clean_data,values='active_cases',index='Districts')
top_active.head()
| active_cases | |
|---|---|
| Districts | |
| Ahmednagar | 9103 |
| Akola | 4050 |
| Amravati | 2935 |
| Aurangabad | 19466 |
| Beed | 4139 |
px.line(data_frame=top_active,orientation='v',title='Line Graph of Active Cases in Maharashrta')
fig=px.scatter(clean_data,x='positive_cases',y='recoverd',color='Districts',orientation='v',
title='positive cases over recoverd')
fig.show()
rate=clean_data[['Districts','deaths_rate(%)','recovery_rate(%)']]
rate
| Districts | deaths_rate(%) | recovery_rate(%) | |
|---|---|---|---|
| 0 | Ahmednagar | 1.3 | 89.0 |
| 1 | Akola | 1.6 | 84.1 |
| 2 | Amravati | 1.3 | 92.7 |
| 3 | Aurangabad | 1.6 | 75.2 |
| 4 | Beed | 2.4 | 81.7 |
| 5 | Bhandara | 1.8 | 84.3 |
| 6 | Buldhana | 1.0 | 84.7 |
| 7 | Chandrapur | 1.5 | 90.3 |
| 8 | Dhule | 1.4 | 79.0 |
| 9 | Gadchiroli | 1.1 | 94.7 |
| 10 | Gondia | 1.1 | 93.4 |
| 11 | Hingoli | 1.4 | 71.3 |
| 12 | Jalgaon | 1.9 | 90.5 |
| 13 | Jalna | 1.8 | 95.3 |
| 14 | Kolhapur | 3.3 | 95.3 |
| 15 | Latur | 2.3 | 81.0 |
| 16 | Mumbai | 2.8 | 84.9 |
| 17 | Nagpur | 1.7 | 78.2 |
| 18 | Nanded | 1.8 | 69.8 |
| 19 | Nandurbar | 1.6 | 73.3 |
| 20 | Nashik | 1.2 | 80.1 |
| 21 | Osmanabad | 2.7 | 86.3 |
| 23 | Palghar | 1.8 | 91.9 |
| 24 | Parbhani | 2.5 | 65.2 |
| 25 | Pune | 1.6 | 86.5 |
| 26 | Raigad | 2.0 | 92.4 |
| 27 | Ratnagiri | 3.3 | 92.3 |
| 28 | Sangli | 3.3 | 92.9 |
| 29 | Satara | 2.9 | 92.0 |
| 30 | Sindhudurg | 2.5 | 90.7 |
| 31 | Solapur | 2.8 | 88.9 |
| 32 | Thane | 1.8 | 86.5 |
| 33 | Wardha | 1.8 | 89.3 |
| 34 | Washim | 1.2 | 81.5 |
| 35 | Yavatmal | 1.9 | 83.3 |
fig=px.scatter(rate,x='deaths_rate(%)',y='recovery_rate(%)',color='Districts',orientation='v',
title='death_rate(%) And recovery_rate(%)')
fig.show()